Expression profile of gene age categories

If we group certain PS’, do we see an interesting pattern of when these genes are more expressed? PS’ are grouped as follows

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(myTAI)
library(see)

Load data

Load PES

# Non-transformed
Ec_PES_32m <-
  readr::read_csv(file = "data/Ec_PES_32m.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f <-
  readr::read_csv(file = "data/Ec_PES_25f.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# sqrt-tranformed
Ec_PES_32m.sqrt <-
  readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt <-
  readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# log2-tranformed
Ec_PES_32m.log2 <-
  readr::read_csv(file = "data/Ec_PES_32m.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2 <-
  readr::read_csv(file = "data/Ec_PES_25f.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rank-tranformed
Ec_PES_32m.rank <-
  readr::read_csv(file = "data/Ec_PES_32m.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rank <-
  readr::read_csv(file = "data/Ec_PES_25f.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rlog-tranformed
Ec_PES_32m.rlog <-
  readr::read_csv(file = "data/Ec_PES_32m.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rlog <-
  readr::read_csv(file = "data/Ec_PES_25f.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Plot results

group_PS <- function(PES,  Groups = list(Ancient = c(1:2),
                                         Early = c(3:6), 
                                         Phaeophyceae = 7, 
                                         Recent = c(8:11))){
        if(!myTAI::is.ExpressionSet(PES)) stop("PES is not an ExpressionSet")
        if(length(Groups) != 4) stop("Rewrite the function or have just four groups")
        PES_new <- PES %>%
                mutate(Groups = case_when(
                        Phylostratum %in% Groups[[1]] ~ names(Groups[1]),
                        Phylostratum %in% Groups[[2]] ~ names(Groups[2]),
                        Phylostratum %in% Groups[[3]] ~ names(Groups[3]),
                        Phylostratum %in% Groups[[4]] ~ names(Groups[4]),
                        TRUE ~ NA_character_
                        ))
        return(PES_new)
}
plot_PGroups <- function(grouped_PES, ordered_stages){
        # pivot the dataframe longer for ggplot2
        grouped_PES_filt <- grouped_PES %>% 
                dplyr::select(-Phylostratum) %>% 
                tidyr::pivot_longer(!c(GeneID, Groups), names_to = "Stage", values_to = "Abundance")
        
        # make stages factors
        grouped_PES_filt$Stage <- base::factor(grouped_PES_filt$Stage, ordered_stages)
        
        # plot results
        grouped_PES_filt_plot <- grouped_PES_filt %>%
                ggplot2::ggplot(aes(x = Stage, y = Abundance, colour = Groups)) +
                ggplot2::geom_line(alpha = 0.05, aes(group = GeneID)) +
                ggplot2::facet_grid(~Groups) +
                # ggplot2::geom_boxplot(colour = "black") +
                see::geom_violinhalf(colour = "black", width = 1.2) +
                # ggplot2::stat_summary(fun = "median", colour = "black") +
                # ggplot2::stat_summary(fun.data = "mean_cl_boot", colour = "black", width = .4) +
                ggsci::scale_colour_npg() +
                # ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
                ggplot2::theme_classic() +
                ggplot2::theme(legend.position = "none",
                               axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
        
        return(grouped_PES_filt_plot)
}
ordered_stages = c("meiospore", "immGA", "matGA", "oldGA", "gamete", "earlyPSP","immPSP", "matPSP", "mitospore")

# log2(x+1)
Ec_PES_32m.log2 %>% 
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA]", y = "log2(TPM+1)")

Ec_PES_32m.log2 %>% 
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP]", y = "log2(TPM+1)")

Ec_PES_32m.log2 %>% 
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell]", y = "log2(TPM+1)")

Ec_PES_25f.log2 %>%
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA]", y = "log2(TPM+1)")

Ec_PES_25f.log2 %>%
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP]", y = "log2(TPM+1)")

Ec_PES_25f.log2 %>%
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell]", y = "log2(TPM+1)")

# rlog
Ec_PES_32m.rlog %>%
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA]", y = "rlog(TPM)")

Ec_PES_32m.rlog %>%
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP]", y = "rlog(TPM)")

Ec_PES_32m.rlog %>%
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell]", y = "rlog(TPM)")

Ec_PES_25f.rlog %>%
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA]", y = "rlog(TPM)")

Ec_PES_25f.rlog %>%
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP]", y = "rlog(TPM)")

Ec_PES_25f.rlog %>%
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>%  group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell]", y = "rlog(TPM)")

Summary

The results are simply not that interesting. Genes from all PS act the same way and have the same underlying distribution, except the most recent PS’ (8 to 11), which has more lowly expressed genes.

Further analysis using denoised data.

Load PES (denoised)

# # Non-transformed
# Ec_PES_32m_denoised <-
#   readr::read_csv(file = "data/Ec_PES_32m_denoised.csv")
# Ec_PES_25f_denoised <-
#   readr::read_csv(file = "data/Ec_PES_25f_denoised.csv")
# 
# # sqrt-tranformed
# Ec_PES_32m.sqrt_denoised <-
#   readr::read_csv(file = "data/Ec_PES_32m.sqrt_denoised.csv")
# Ec_PES_25f.sqrt_denoised <-
#   readr::read_csv(file = "data/Ec_PES_25f.sqrt_denoised.csv")

# log2-tranformed
Ec_PES_32m.log2_denoised <-
  readr::read_csv(file = "data/Ec_PES_32m.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2_denoised <-
  readr::read_csv(file = "data/Ec_PES_25f.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ordered_stages = c("meiospore", "immGA", "matGA", "oldGA", "gamete", "earlyPSP","immPSP", "matPSP", "mitospore")

# log2(x+1)
Ec_PES_32m.log2_denoised %>% 
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA] denoised", y = "log2(TPM+1)")

Ec_PES_32m.log2_denoised %>% 
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP] denoised", y = "log2(TPM+1)")

Ec_PES_32m.log2_denoised %>% 
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>% 
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell] denoised", y = "log2(TPM+1)")

Ec_PES_25f.log2_denoised %>%
        dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA] denoised", y = "log2(TPM+1)")

Ec_PES_25f.log2_denoised %>%
        dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP] denoised", y = "log2(TPM+1)")

Ec_PES_25f.log2_denoised %>%
        dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
        plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell] denoised", y = "log2(TPM+1)")

Get session info.

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.2 (2022-10-31)
##  os       macOS Big Sur ... 10.16
##  system   x86_64, darwin17.0
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Europe/Berlin
##  date     2023-10-24
##  pandoc   3.1.6.2 @ /usr/local/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package     * version    date (UTC) lib source
##  bit           4.0.5      2022-11-15 [1] CRAN (R 4.2.0)
##  bit64         4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
##  bslib         0.5.1      2023-08-11 [1] CRAN (R 4.2.0)
##  cachem        1.0.8      2023-05-01 [1] CRAN (R 4.2.0)
##  callr         3.7.3      2022-11-02 [1] CRAN (R 4.2.0)
##  cli           3.6.1      2023-03-23 [1] CRAN (R 4.2.0)
##  codetools     0.2-19     2023-02-01 [1] CRAN (R 4.2.0)
##  colorspace    2.1-0      2023-01-23 [1] CRAN (R 4.2.0)
##  crayon        1.5.2      2022-09-29 [1] CRAN (R 4.2.0)
##  devtools      2.4.5      2022-10-11 [1] CRAN (R 4.2.0)
##  digest        0.6.33     2023-07-07 [1] CRAN (R 4.2.0)
##  dplyr       * 1.1.3      2023-09-03 [1] CRAN (R 4.2.0)
##  ellipsis      0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
##  evaluate      0.22       2023-09-29 [1] CRAN (R 4.2.2)
##  fansi         1.0.5      2023-10-08 [1] CRAN (R 4.2.2)
##  farver        2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
##  fastmap       1.1.1      2023-02-24 [1] CRAN (R 4.2.0)
##  forcats     * 1.0.0      2023-01-29 [1] CRAN (R 4.2.0)
##  foreach       1.5.2      2022-02-02 [1] CRAN (R 4.2.0)
##  fs            1.6.3      2023-07-20 [1] CRAN (R 4.2.0)
##  generics      0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
##  ggplot2     * 3.4.4      2023-10-12 [1] CRAN (R 4.2.2)
##  ggsci         3.0.0      2023-03-08 [1] CRAN (R 4.2.0)
##  glue          1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
##  gtable        0.3.4      2023-08-21 [1] CRAN (R 4.2.0)
##  hms           1.1.3      2023-03-21 [1] CRAN (R 4.2.0)
##  htmltools     0.5.6.1    2023-10-06 [1] CRAN (R 4.2.2)
##  htmlwidgets   1.6.2      2023-03-17 [1] CRAN (R 4.2.0)
##  httpuv        1.6.11     2023-05-11 [1] CRAN (R 4.2.2)
##  insight       0.19.6     2023-10-12 [1] CRAN (R 4.2.2)
##  iterators     1.0.14     2022-02-05 [1] CRAN (R 4.2.0)
##  jquerylib     0.1.4      2021-04-26 [1] CRAN (R 4.2.0)
##  jsonlite      1.8.7      2023-06-29 [1] CRAN (R 4.2.0)
##  knitr         1.44       2023-09-11 [1] CRAN (R 4.2.0)
##  labeling      0.4.3      2023-08-29 [1] CRAN (R 4.2.0)
##  later         1.3.1      2023-05-02 [1] CRAN (R 4.2.2)
##  lattice       0.21-9     2023-10-01 [1] CRAN (R 4.2.2)
##  lifecycle     1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
##  lubridate   * 1.9.3      2023-09-27 [1] CRAN (R 4.2.0)
##  magrittr      2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
##  Matrix        1.5-4.1    2023-05-18 [1] CRAN (R 4.2.0)
##  memoise       2.0.1      2021-11-26 [1] CRAN (R 4.2.0)
##  mime          0.12       2021-09-28 [1] CRAN (R 4.2.0)
##  miniUI        0.1.1.1    2018-05-18 [1] CRAN (R 4.2.0)
##  munsell       0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
##  myTAI       * 1.0.1.9000 2023-10-03 [1] Github (drostlab/myTAI@e159136)
##  pillar        1.9.0      2023-03-22 [1] CRAN (R 4.2.0)
##  pkgbuild      1.4.2      2023-06-26 [1] CRAN (R 4.2.0)
##  pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
##  pkgload       1.3.3      2023-09-22 [1] CRAN (R 4.2.0)
##  prettyunits   1.2.0      2023-09-24 [1] CRAN (R 4.2.0)
##  processx      3.8.2      2023-06-30 [1] CRAN (R 4.2.0)
##  profvis       0.3.8      2023-05-02 [1] CRAN (R 4.2.0)
##  promises      1.2.1      2023-08-10 [1] CRAN (R 4.2.2)
##  ps            1.7.5      2023-04-18 [1] CRAN (R 4.2.0)
##  purrr       * 1.0.2      2023-08-10 [1] CRAN (R 4.2.2)
##  R6            2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
##  Rcpp          1.0.11     2023-07-06 [1] CRAN (R 4.2.0)
##  readr       * 2.1.4      2023-02-10 [1] CRAN (R 4.2.0)
##  remotes       2.4.2.1    2023-07-18 [1] CRAN (R 4.2.2)
##  rlang         1.1.1      2023-04-28 [1] CRAN (R 4.2.0)
##  rmarkdown     2.25       2023-09-18 [1] CRAN (R 4.2.2)
##  rstudioapi    0.15.0     2023-07-07 [1] CRAN (R 4.2.0)
##  sass          0.4.7      2023-07-15 [1] CRAN (R 4.2.0)
##  scales        1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
##  see         * 0.8.0      2023-06-05 [1] CRAN (R 4.2.0)
##  sessioninfo   1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
##  shiny         1.7.5.1    2023-10-14 [1] CRAN (R 4.2.2)
##  stringi       1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
##  stringr     * 1.5.0      2022-12-02 [1] CRAN (R 4.2.0)
##  tibble      * 3.2.1      2023-03-20 [1] CRAN (R 4.2.0)
##  tidyr       * 1.3.0      2023-01-24 [1] CRAN (R 4.2.0)
##  tidyselect    1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
##  tidyverse   * 2.0.0      2023-02-22 [1] CRAN (R 4.2.0)
##  timechange    0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
##  tzdb          0.4.0      2023-05-12 [1] CRAN (R 4.2.2)
##  urlchecker    1.0.1      2021-11-30 [1] CRAN (R 4.2.0)
##  usethis       2.2.2      2023-07-06 [1] CRAN (R 4.2.0)
##  utf8          1.2.3      2023-01-31 [1] CRAN (R 4.2.0)
##  vctrs         0.6.4      2023-10-12 [1] CRAN (R 4.2.2)
##  vroom         1.6.4      2023-10-02 [1] CRAN (R 4.2.2)
##  withr         2.5.1      2023-09-26 [1] CRAN (R 4.2.0)
##  xfun          0.40       2023-08-09 [1] CRAN (R 4.2.2)
##  xtable        1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
##  yaml          2.3.7      2023-01-23 [1] CRAN (R 4.2.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────